import pandas as pd
import numpy as np
import warnings
import sys
#visualization
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sktime.utils.plotting import plot_series, plot_lags, plot_correlations
#from visuals import *
#config to clean up the notebook
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format
warnings.filterwarnings('ignore')
#read the data and parse
df = pd.read_csv('sales_clean.csv')
df = df.set_index(['1', '2']).sort_index()
df.head()
df_sales = df[df['0'] == 'sales']
df_onpromotion = df[df['0'] == 'onpromotion']
df_sales.drop('0', axis=1, inplace=True)
df_sales.index.rename(['family', 'date'], level=[0,1], inplace=True)
df_sales = df_sales.unstack('family')
df_sales.columns = df_sales.columns.droplevel()
df_sales.head()
df_onpromotion.drop('0', axis=1, inplace=True)
df_onpromotion.index.rename(['family', 'date'], level=[0,1], inplace=True)
df_onpromotion = df_onpromotion.unstack('family')
df_onpromotion.columns = df_onpromotion.columns.droplevel()
#parse dates
df_sales.index = pd.to_datetime(df_sales.index)
df_onpromotion.index = pd.to_datetime(df_onpromotion.index)
from sktime.forecasting.model_selection import SlidingWindowSplitter
#separate train and test
y_train = df_sales.iloc[:-15]
y_test = df_sales.iloc[-15:]
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.base import ForecastingHorizon
fh = ForecastingHorizon(y_test.index, is_relative=False)
forecaster = NaiveForecaster(strategy='last', sp=7)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
from sktime.performance_metrics.forecasting import MeanSquaredError
rmse = MeanSquaredError(square_root=True)
for col in y_train.columns:
plot_series(y_train[col], y_test[col], y_pred[col], labels=['y_train', 'y_test', 'y_pred'])
print('RMSE for {}: {}'.format(col, rmse(y_test[col], y_pred[col])))
print('Overall RMSE {}'.format(rmse(y_test,y_pred)))
RMSE for AUTOMOTIVE: 1.358381001891627 RMSE for BABY CARE: 0.0969352019679599 RMSE for BEAUTY: 1.6163742516734927 RMSE for BEVERAGES: 629.8133697253157 RMSE for BOOKS: 0.02191140676732531 RMSE for BREAD/BAKERY: 54.638165398252255 RMSE for CELEBRATION: 2.3688554957858448 RMSE for CLEANING: 581.5756684629596 RMSE for DAIRY: 95.19699760691029 RMSE for DELI: 41.212282499624564 RMSE for EGGS: 20.67250901534757 RMSE for FROZEN FOODS: 12.506054721096808 RMSE for GROCERY I: 791.7376935103116 RMSE for GROCERY II: 11.5325489717349 RMSE for HARDWARE: 0.19580651176327848 RMSE for HOME AND KITCHEN I: 5.854356140667163 RMSE for HOME AND KITCHEN II: 7.427266496125442 RMSE for HOME APPLIANCES: 0.15180667666099854 RMSE for HOME CARE: 78.19120871893784 RMSE for LADIESWEAR: 1.791976582463979 RMSE for LAWN AND GARDEN: 1.8161242622125413 RMSE for LINGERIE: 2.289471234711228 RMSE for LIQUOR,WINE,BEER: 22.58739890864535 RMSE for MAGAZINES: 2.4666501000439687 RMSE for MEATS: 37.824619704418026 RMSE for PERSONAL CARE: 76.6610270107757 RMSE for PET SUPPLIES: 1.4669191549337932 RMSE for PLAYERS AND ELECTRONICS: 1.7508374533172635 RMSE for POULTRY: 45.92946889127437 RMSE for PREPARED FOODS: 6.923388492262947 RMSE for PRODUCE: 227.95699803772067 RMSE for SCHOOL AND OFFICE SUPPLIES: 47.81117891504797 RMSE for SEAFOOD: 3.299214022617051 Overall RMSE 85.35586256315877
Here we have the first and simplest iteration of a predictor, using a NaiveForecaster with a seasonal period of 7 and a strategy of "last", which of means that it takes into account the previous 7 values when making its prediction. This parameter was chosen to capture some of the weekly seasonality we observed in the data.
Here I have chosen RMSE or root mean squared error as the metric for evaluation. Advantages of this metric are the results are in the same unit as the target variable, which makes for good explainability downstream. It is also robust against 0 values, where something like MAPE or mean absolute percentage error is not. This could cause problems for us because some of the families like books for example, have 0 values in the ground truth for the test set.
We can see the results both visually and observe the RMSE. This will give us a baseline to evaluate our future iterations of modeling and prediction. Since RMSE gives us a value in the same unit as our target variable, it represents an error in sales units, so lower will be better.
It should be noted however, that in order to actually select the model that generalizes the best to unseen data, we should implement a cross-validation strategy that will iteratively fit models and then test on portions of our training set. We can record the RMSE for each of these iterations and then take an average to see how well it does. While this is not a concept specific to time-series analysis, some special consideration needs to be applied to the splitting process when dealing with a time series.
The most important thing is we need to prevent data leakage, and we need to have consecutive dates in our training data.
We will be using a k-fold cv strategy that uses and expanding window strategy. This would look something like this for 5 folds (k=5)
fold 0 : ++++++++***
fold 1 : ++++++++++++++++***
fold 2 : ++++++++++++++++++++++++***
fold 3 : ++++++++++++++++++++++++++++++++***
fold 4 : ++++++++++++++++++++++++++++++++++++++++***
from sklearn.model_selection import TimeSeriesSplit, KFold
#runs k fold cross validation and returns the mean of the error metric
def ts_model_cv(y_train, forecaster=None, cv=None, fh=None, metric=None, fit_fh=False):
folds = list(cv.split_loc(y_train))
results = []
for n in range(cv.get_n_splits(y_train)):
train = y_train.loc[folds[n][0]]
test = y_train.loc[folds[n][1]]
if fit_fh:
forecaster.fit(train, fh=fh)
else:
forecaster.fit(train)
y_pred = forecaster.predict(fh)
results.append(metric(test,y_pred))
print('Fold {}: {}'.format(n, metric(test,y_pred)))
return np.mean(results)
Here we can see that our RMSE across the 50 fold cross validation is a bit higher than the RMSE on our holdout test set. In real life we aren't going to have the ground truth values for the period we are interested in forecasting, so this cross validation performance will be a much more reliable metric to use to measure our models performance than comparing performance on the test set.
lets try a few more models
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.trend import STLForecaster
from sktime.forecasting.compose import DirRecTimeSeriesRegressionForecaster, make_reduction
from sklearn.ensemble import GradientBoostingRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sktime.forecasting.model_selection import ExpandingWindowSplitter
model_results = {}
fh = ForecastingHorizon(np.arange(1,16))
cv = ExpandingWindowSplitter(fh=fh, initial_window=42, step_length=15)
metric = MeanSquaredError(square_root=True)
models = [
NaiveForecaster(strategy='last', sp=7),
ExponentialSmoothing(trend='add', seasonal='add', sp=7),
make_reduction(LGBMRegressor(max_depth=6, n_estimators=20), window_length=13),
make_reduction(XGBRegressor(max_depth=6, n_estimators=20), window_length=13),
make_reduction(LGBMRegressor(max_depth=7, n_estimators=52), window_length=20),
make_reduction(XGBRegressor(max_depth=7, n_estimators=52), window_length=20),
STLForecaster(seasonal=7, sp=7, trend=55, robust=True)
]
for model in models:
model_results[str(model)] = ts_model_cv(y_train, forecaster=model, cv=cv, fh=fh, metric=metric)
Fold 0: 37.97236832813313 Fold 1: 68.93214605435246 Fold 2: 60.68519663159952 Fold 3: 86.96183638161638 Fold 4: 46.24662379946465 Fold 5: 78.33349162239456 Fold 6: 80.5210725967284 Fold 7: 81.44234255104284 Fold 8: 117.94604922837367 Fold 9: 66.11299487109609 Fold 10: 133.64575259066103 Fold 11: 172.95062789835293 Fold 12: 89.18719274300629 Fold 13: 76.48868387445403 Fold 14: 89.84934450144004 Fold 15: 53.33359616900093 Fold 16: 65.80589363869711 Fold 17: 80.87042430553993 Fold 18: 166.8559483745427 Fold 19: 204.39814922796813 Fold 20: 111.7448267877748 Fold 21: 69.52191758920553 Fold 22: 108.83709051664113 Fold 23: 69.04455017278157 Fold 24: 82.17249083547424 Fold 25: 47.10083625735518 Fold 26: 72.11365612545468 Fold 27: 77.11414288764338 Fold 28: 67.16779518424063 Fold 29: 63.752811492398095 Fold 30: 87.05450818576432 Fold 31: 78.27133412838076 Fold 32: 92.54020293423127 Fold 33: 58.31296196526873 Fold 34: 101.58889851031057 Fold 35: 176.18689424808724 Fold 36: 176.96400678100608 Fold 37: 45.788669811126056 Fold 38: 68.01896850372151 Fold 39: 100.56513266894753 Fold 40: 122.113330644381 Fold 41: 74.07232940380925 Fold 42: 124.90036549123342 Fold 43: 100.44341137910979 Fold 44: 122.71145211072066 Fold 45: 80.44080776359165 Fold 46: 84.46470759682114 Fold 47: 64.76880290139374 Fold 48: 71.60745835880164 Fold 49: 37.176850502731796 Fold 0: 29.520746624005643 Fold 1: 77.38277807391059 Fold 2: 28.18263042447652 Fold 3: 93.64629486456772 Fold 4: 26.239234462422804 Fold 5: 92.03833151564399 Fold 6: 36.63183165247759 Fold 7: 78.79703468272395 Fold 8: 37.68917663999696 Fold 9: 77.03111126716091 Fold 10: 130.1425494688859 Fold 11: 233.95771982372906 Fold 12: 47.935395869583886 Fold 13: 72.38716341664438 Fold 14: 176.57668292536243 Fold 15: 61.346521600382744 Fold 16: 38.07538105523951 Fold 17: 82.96647985963796 Fold 18: 170.94764885929675 Fold 19: 91.53727594698718 Fold 20: 78.27666296993834 Fold 21: 86.31406500920409 Fold 22: 53.66790033082141 Fold 23: 82.89120101287111 Fold 24: 42.71561625683541 Fold 25: 73.60212007860966 Fold 26: 57.91128372358087 Fold 27: 93.29264917042549 Fold 28: 67.52679156695561 Fold 29: 102.51397590660393 Fold 30: 59.01250017656453 Fold 31: 102.19646514301262 Fold 32: 72.04392509152478 Fold 33: 76.68025907516328 Fold 34: 81.44761273780234 Fold 35: 158.2848985823557 Fold 36: 222.8956078592562 Fold 37: 62.113231931323476 Fold 38: 56.49459234500989 Fold 39: 111.9937208469373 Fold 40: 55.15988784185965 Fold 41: 94.70242676391948 Fold 42: 172.0613042572896 Fold 43: 118.87789869543543 Fold 44: 115.64575908173587 Fold 45: 58.28291553876118 Fold 46: 58.5509149964444 Fold 47: 58.1621718206514 Fold 48: 62.56538080236745 Fold 49: 55.10369909826481 Fold 0: 74.7241338232219 Fold 1: 71.23601438530027 Fold 2: 66.63455470883827 Fold 3: 81.31840240502846 Fold 4: 62.6072531002815 Fold 5: 92.08254470155336 Fold 6: 72.33090906858212 Fold 7: 81.90122536641626 Fold 8: 81.24375761631416 Fold 9: 65.44658962687654 Fold 10: 132.31103926613042 Fold 11: 135.17313070592164 Fold 12: 106.21952642819954 Fold 13: 66.65829159466217 Fold 14: 63.89180601901875 Fold 15: 57.194043271569456 Fold 16: 45.079816552244 Fold 17: 78.8781692064243 Fold 18: 152.98739336405686 Fold 19: 104.51410604957034 Fold 20: 55.57375753747167 Fold 21: 68.86889440710594 Fold 22: 60.946419384013915 Fold 23: 60.56819290331203 Fold 24: 50.39617491499451 Fold 25: 49.51877088494729 Fold 26: 54.187461715142284 Fold 27: 67.22592567035016 Fold 28: 62.68097260730285 Fold 29: 74.49255598120838 Fold 30: 56.88353558784503 Fold 31: 68.32430332298541 Fold 32: 77.29166063726521 Fold 33: 61.286011379945 Fold 34: 85.49029439063229 Fold 35: 174.59067102770402 Fold 36: 156.11868313614562 Fold 37: 60.05537461597494 Fold 38: 62.925072270499435 Fold 39: 80.75379853733297 Fold 40: 107.21236415426955 Fold 41: 88.92610026085063 Fold 42: 56.698931541151204 Fold 43: 97.55989882466083 Fold 44: 81.02255816025968 Fold 45: 70.31098660349949 Fold 46: 84.29467490326196 Fold 47: 63.25185730414387 Fold 48: 49.6453813602413 Fold 49: 41.36082625388983 Fold 0: 54.80977996214783 Fold 1: 66.23211392656316 Fold 2: 72.2894523318756 Fold 3: 76.31182482161833 Fold 4: 60.70168366326003 Fold 5: 82.82278770195276 Fold 6: 81.18075431811091 Fold 7: 83.94548427185494 Fold 8: 59.91191263730318 Fold 9: 66.86877060094046 Fold 10: 138.93003848453117 Fold 11: 144.05632697882774 Fold 12: 140.2832849005002 Fold 13: 72.48750029582936 Fold 14: 65.04137204860987 Fold 15: 54.246004823641506 Fold 16: 38.39295871101111 Fold 17: 80.57246287814787 Fold 18: 142.40694909429342 Fold 19: 105.55344041317188 Fold 20: 52.87394660616728 Fold 21: 81.07370899392104 Fold 22: 59.03501016620012 Fold 23: 66.91065624552441 Fold 24: 67.35960167400195 Fold 25: 50.94312919641326 Fold 26: 67.20640071419886 Fold 27: 74.98417981281197 Fold 28: 71.83934976093245 Fold 29: 71.08798442624683 Fold 30: 57.17865347128641 Fold 31: 76.48276559216643 Fold 32: 74.01821860382246 Fold 33: 64.35079057516303 Fold 34: 82.55389049289523 Fold 35: 157.43813112767558 Fold 36: 144.2709960730875 Fold 37: 60.769067098626216 Fold 38: 68.83745588726782 Fold 39: 98.16225211392236 Fold 40: 122.46262493677085 Fold 41: 87.18926440504788 Fold 42: 66.64864820475837 Fold 43: 95.36291194766126 Fold 44: 88.55602614487533 Fold 45: 81.25343108754834 Fold 46: 83.73115020289495 Fold 47: 63.3602815917445 Fold 48: 53.1998895702403 Fold 49: 43.430686456962995 Fold 0: 75.87262486322734 Fold 1: 89.29492883328649 Fold 2: 61.795979134773695 Fold 3: 80.04508014566929 Fold 4: 52.304854663340905 Fold 5: 86.24236861589675 Fold 6: 68.33834465441409 Fold 7: 83.07370033130637 Fold 8: 60.64937504007555 Fold 9: 66.67362788351076 Fold 10: 140.39551318309987 Fold 11: 156.70916179639005 Fold 12: 115.00544407846951 Fold 13: 71.50856777018488 Fold 14: 58.66160511240305 Fold 15: 59.90980308863825 Fold 16: 50.2553935353961 Fold 17: 73.78761346452156 Fold 18: 160.23438882421442 Fold 19: 90.7500439614552 Fold 20: 54.9106883378124 Fold 21: 71.41695981441958 Fold 22: 49.79010312686895 Fold 23: 55.05599156184801 Fold 24: 43.754688833490974 Fold 25: 42.342474183595655 Fold 26: 61.37270356637474 Fold 27: 61.145761960280495 Fold 28: 45.456437471624 Fold 29: 66.01810781676637 Fold 30: 59.051019822677326 Fold 31: 66.81555220449304 Fold 32: 57.44948075352865 Fold 33: 61.3870152045166 Fold 34: 70.26811145540219 Fold 35: 151.05527266332743 Fold 36: 175.13546543353578 Fold 37: 58.610652691917196 Fold 38: 43.72537689800849 Fold 39: 97.54184835742853 Fold 40: 92.8920116290089 Fold 41: 77.76591884226083 Fold 42: 72.49875029910919 Fold 43: 90.22605275160498 Fold 44: 82.24746809083683 Fold 45: 69.60558216573412 Fold 46: 72.9060268035643 Fold 47: 49.81222869963842 Fold 48: 55.361699518130116 Fold 49: 42.91537781247247 Fold 0: 64.08348839945558 Fold 1: 62.41399045668362 Fold 2: 55.352235584934284 Fold 3: 78.90407020430737 Fold 4: 57.24300443565902 Fold 5: 85.10189606126877 Fold 6: 74.30193149173994 Fold 7: 81.14215930114118 Fold 8: 63.886711675113986 Fold 9: 58.01137133788293 Fold 10: 132.7209903168583 Fold 11: 155.30380334950496 Fold 12: 118.39304148427108 Fold 13: 79.3487223036532 Fold 14: 54.394916572427 Fold 15: 62.06390775530357 Fold 16: 63.32431946326353 Fold 17: 79.1133250633088 Fold 18: 155.76178568520436 Fold 19: 161.1049141065568 Fold 20: 69.76371134623182 Fold 21: 65.21655268536509 Fold 22: 61.67626029653103 Fold 23: 48.849048725241516 Fold 24: 36.06435461368767 Fold 25: 45.93507096157189 Fold 26: 70.45862300131276 Fold 27: 69.42887352014021 Fold 28: 74.59202434679663 Fold 29: 68.51354280753789 Fold 30: 56.683422841621685 Fold 31: 76.34559481892062 Fold 32: 63.00039682659467 Fold 33: 63.4382082722158 Fold 34: 66.41031246753414 Fold 35: 157.55467880064728 Fold 36: 132.53087240448096 Fold 37: 66.52479263461917 Fold 38: 66.86281810708691 Fold 39: 104.21202351169165 Fold 40: 96.662992494875 Fold 41: 76.18482527955891 Fold 42: 74.71108640808019 Fold 43: 87.52203493881241 Fold 44: 97.35124346753673 Fold 45: 66.73063691999512 Fold 46: 74.74907645945537 Fold 47: 72.63438819171414 Fold 48: 50.946948625742756 Fold 49: 48.92718854767634 Fold 0: 37.56722316734588 Fold 1: 67.28667473621381 Fold 2: 48.89007328801562 Fold 3: 80.44811088316418 Fold 4: 43.543583904051665 Fold 5: 76.40930508467474 Fold 6: 61.949251209966945 Fold 7: 80.27774328064156 Fold 8: 55.875642574761315 Fold 9: 59.55086317850294 Fold 10: 133.88787415684735 Fold 11: 125.42926609900331 Fold 12: 178.65429061321782 Fold 13: 76.27066796790204 Fold 14: 73.52108728506688 Fold 15: 56.15489153559924 Fold 16: 56.12818621406225 Fold 17: 78.79032177013453 Fold 18: 176.39439090655947 Fold 19: 87.58101808117378 Fold 20: 116.86807871845876 Fold 21: 67.18183667835213 Fold 22: 65.79165378614249 Fold 23: 68.5102177478987 Fold 24: 63.14766066366037 Fold 25: 45.58302707813617 Fold 26: 61.238648608738856 Fold 27: 69.96788855934953 Fold 28: 58.169805400545116 Fold 29: 62.08206193484601 Fold 30: 68.3185424970567 Fold 31: 76.13357758251533 Fold 32: 74.61259740503112 Fold 33: 64.55988984992439 Fold 34: 86.19734606649563 Fold 35: 171.11781752749738 Fold 36: 147.40297007475485 Fold 37: 66.80340693793572 Fold 38: 66.5159005835431 Fold 39: 90.16228599256632 Fold 40: 100.73270841391026 Fold 41: 72.841537474245 Fold 42: 73.89008583018811 Fold 43: 93.54958590809923 Fold 44: 57.8561070080154 Fold 45: 77.55928621936887 Fold 46: 70.4966346718921 Fold 47: 70.00761426596267 Fold 48: 69.20299437009729 Fold 49: 34.868912907962894
print(model_results)
{'NaiveForecaster(sp=7)': 89.90201894253744, "ExponentialSmoothing(seasonal='add', sp=7, trend='add')": 85.48038855489321, 'RecursiveTabularRegressionForecaster(estimator=LGBMRegressor(max_depth=6,\n n_estimators=20),\n window_length=13)': 78.41789635077235, 'RecursiveTabularRegressionForecaster(estimator=XGBRegressor(base_score=None,\n booster=None,\n callbacks=None,\n colsample_bylevel=None,\n colsample_bynode=None,\n colsample_bytree=None,\n early_stopping_rounds=None,\n enable_categorical=False,\n eval_metric=None,\n feature_types=None,\n gamma=None,\n gpu_id=None,\n grow_policy=None,\n importance_type=None,\n interaction_constraints=None,\n learning_rate=None,\n max_bin=None,\n max_cat_threshold=None,\n max_cat_to_onehot=None,\n max_delta_step=None,\n max_depth=6,\n max_leaves=None,\n min_child_weight=None,\n missing=nan,\n monotone_constraints=None,\n n_estimators=20,\n n_jobs=None,\n num_parallel_tree=None,\n predictor=None,\n random_state=None, ...),\n window_length=13)': 80.39232012090055, 'RecursiveTabularRegressionForecaster(estimator=LGBMRegressor(max_depth=7,\n n_estimators=52),\n window_length=20)': 76.00086495501101, 'RecursiveTabularRegressionForecaster(estimator=XGBRegressor(base_score=None,\n booster=None,\n callbacks=None,\n colsample_bylevel=None,\n colsample_bynode=None,\n colsample_bytree=None,\n early_stopping_rounds=None,\n enable_categorical=False,\n eval_metric=None,\n feature_types=None,\n gamma=None,\n gpu_id=None,\n grow_policy=None,\n importance_type=None,\n interaction_constraints=None,\n learning_rate=None,\n max_bin=None,\n max_cat_threshold=None,\n max_cat_to_onehot=None,\n max_delta_step=None,\n max_depth=7,\n max_leaves=None,\n min_child_weight=None,\n missing=nan,\n monotone_constraints=None,\n n_estimators=52,\n n_jobs=None,\n num_parallel_tree=None,\n predictor=None,\n random_state=None, ...),\n window_length=20)': 79.04904378743629, 'STLForecaster(robust=True, sp=7, trend=55)': 79.31962293400193}
from sktime.forecasting.trend import STLForecaster
model = STLForecaster(seasonal=7, sp=7, trend=55, robust=True)
model.fit(y_train)
fh = ForecastingHorizon(y_test.index, is_relative=False)
y_pred = model.predict(fh)
print('RMSE {}'.format(rmse(y_test,y_pred)))
RMSE 75.56295369442584
for col in y_train.columns:
plot_series(y_train[col], y_test[col], y_pred[col])
We can see that from trying a few different models and experiementing with a few hyperparameters for the STL forecaster, we have obtained a lower CV score with the STLforecaster using sp=7 and trend = 55. Recall that in the EDA we found that an sp of 28 smoothed the trend out enough to where enough of the seasonality was beiing caught in the decomposition. It turns out this is actually due to the trend hyperparameter, but by default it is set based off of the sp value. The sp value should be 7 to capture weekly seasonality, so the 55 value was calculated using the formula from the documentation based on the sp=28 value we used during decomposition.
We can also see when we check the rmse vs our hold out test set, the performance is better than with the NaiveForecaster alone. There are many hyperparameters for STLForecaster we could continue to tune, such as using different forecasters within the process to forecast each individual componenet. We can also observe that a large source of error for both models was the huge surge in "SCHOOL AND OFFICE SUPPLIES" that occurred during the test set period.
#lets output these results to CSV to use in our dashboard
y_pred.unstack().to_csv('STLForecaster_pred.csv', index_label=[0,1,2])
from sktime.forecasting.model_selection import ForecastingRandomizedSearchCV
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.compose import make_reduction
from sklearn.ensemble import RandomForestRegressor
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from lightgbm import LGBMRegressor
rmse = MeanSquaredError(square_root=True)
regressor = LGBMRegressor()
forecaster = make_reduction(regressor)
nested_params = {"window_length": list(range(2,21)),
"estimator__max_depth": list(range(5,16)),
"estimator__n_estimators" : list(range(20,120))}
fh = ForecastingHorizon(np.arange(1,16))
cv = ExpandingWindowSplitter(fh=fh, initial_window=42, step_length=15)
nrcv = ForecastingRandomizedSearchCV(forecaster, strategy="refit", cv=cv,
param_distributions=nested_params,
n_iter=10, random_state=42, scoring=rmse)
nrcv.fit(y_train)
print(nrcv.best_params_)
print(nrcv.best_score_)
print(nrcv.cv_results_)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[65], line 22 18 nrcv = ForecastingRandomizedSearchCV(forecaster, strategy="refit", cv=cv, 19 param_distributions=nested_params, 20 n_iter=10, random_state=42, scoring=rmse) 21 nrcv.fit(y_train) ---> 22 print(nrcv.best_params_) 23 print(nrcv.best_score_) 24 print(nrcv.cv_results_) AttributeError: 'ForecastingRandomizedSearchCV' object has no attribute 'best_params_'
y_pred = nrcv.predict(fh=fh)
print(rmse(y_test,y_pred))
69.52885618089292
nrcv.get_fitted_params()
{'forecasters': family AUTOMOTIVE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family BABY CARE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family BEAUTY \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family BEVERAGES \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family BOOKS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family BREAD/BAKERY \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family CELEBRATION \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family CLEANING \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family DAIRY \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family DELI \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family EGGS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family FROZEN FOODS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family GROCERY I \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family GROCERY II \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family HARDWARE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family HOME AND KITCHEN I \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family HOME AND KITCHEN II \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family HOME APPLIANCES \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family HOME CARE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family LADIESWEAR \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family LAWN AND GARDEN \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family LINGERIE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family LIQUOR,WINE,BEER \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family MAGAZINES \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family MEATS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family PERSONAL CARE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family PET SUPPLIES \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family PLAYERS AND ELECTRONICS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family POULTRY \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family PREPARED FOODS \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family PRODUCE \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family SCHOOL AND OFFICE SUPPLIES \
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind...
family SEAFOOD
forecasters ForecastingRandomizedSearchCV(cv=ExpandingWind... ,
'forecasters.loc[forecasters,AUTOMOTIVE]': ForecastingRandomizedSearchCV(cv=ExpandingWindowSplitter(fh=ForecastingHorizon([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], dtype='int64', is_relative=True),
initial_window=42,
step_length=15),
forecaster=RecursiveTabularRegressionForecaster(estimator=LGBMRegressor()),
param_distributions={'estimator__max_depth': [5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15],
'estimator__n_estimators': [20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49, ...],
'window_length': [2, 3, 4, 5,
6, 7, 8, 9,
10, 11, 12,
13, 14, 15,
16, 17, 18,
19, 20]},
random_state=42,
scoring=MeanSquaredError(square_root=True)),
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator': LGBMRegressor(max_depth=7, n_estimators=52),
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__transformers': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__window_length': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__best_iteration': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__best_score': defaultdict(collections.OrderedDict,
{}),
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__booster': <lightgbm.basic.Booster at 0x7f5409277dc0>,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__evals_result': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__feature_importances': array([ 38, 42, 42, 48, 68, 51, 49, 46, 50, 77, 40, 37, 53,
72, 57, 34, 59, 47, 56, 113], dtype=int32),
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__feature_name': ['Column_0',
'Column_1',
'Column_2',
'Column_3',
'Column_4',
'Column_5',
'Column_6',
'Column_7',
'Column_8',
'Column_9',
'Column_10',
'Column_11',
'Column_12',
'Column_13',
'Column_14',
'Column_15',
'Column_16',
'Column_17',
'Column_18',
'Column_19'],
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__fitted': True,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__n_features': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__n_features_in': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__best_forecaster__estimator__objective': 'regression',
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator': LGBMRegressor(max_depth=7, n_estimators=52),
'forecasters.loc[forecasters,AUTOMOTIVE]__transformers': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__window_length': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__best_iteration': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__best_score': defaultdict(collections.OrderedDict,
{}),
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__booster': <lightgbm.basic.Booster at 0x7f5409277dc0>,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__evals_result': None,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__feature_importances': array([ 38, 42, 42, 48, 68, 51, 49, 46, 50, 77, 40, 37, 53,
72, 57, 34, 59, 47, 56, 113], dtype=int32),
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__feature_name': ['Column_0',
'Column_1',
'Column_2',
'Column_3',
'Column_4',
'Column_5',
'Column_6',
'Column_7',
'Column_8',
'Column_9',
'Column_10',
'Column_11',
'Column_12',
'Column_13',
'Column_14',
'Column_15',
'Column_16',
'Column_17',
'Column_18',
'Column_19'],
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__fitted': True,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__n_features': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__n_features_in': 20,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__objective': 'regression',
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__n_estimators': 52,
'forecasters.loc[forecasters,AUTOMOTIVE]__estimator__max_depth': 7}